# 以微博网页图片为例，获取该页面下所有图片并保存。

import json
import os.path
import requests
from lxml import etree

url = 'https://www.sohu.com/a/732296620_121660528'
header = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
'referer':'https://blog.sina.com.cn/s/articlelist_32230282_1_1.html'
}

session = requests.session()
response = session.get(url,headers=header)
response.encoding = 'utf8'

if not os.path.exists('孟子义图片集'):
    os.mkdir('孟子义图片集')

tree = etree.HTML(response.text)
title = tree.xpath('//div//h1//span//text()')[0]

title2 = tree.xpath('//script//text()')[1]


#print(f'标题: {str(title2)}')

start = title2.find('imgsList')
end = title2.find('topNavigation')
if start != -1 and end != -1:
    result = title2[start + 9:end - 2]
    result = result.replace(' ', '')
    
    result = result[0 + 9:len(result) - 2]
    result = result.replace('}', '"aa":"22"}')
    result = '[{"url"' + result
    # print(str(result))
    # python_obj = json.loads(str(result))
    # print(python_obj)
    python_obj = json.loads(str(result))
    #print(python_obj)
    url_list = [item['url'] for item in python_obj if 'url' in item]
    print(url_list)
    n = 1
    for imgurl in url_list:
        realimgurl = imgurl.replace('//', 'https://')
        print(realimgurl)
        response_detail = session.get(url=realimgurl,headers=header).content
        with open(f'./孟子义图片集/{n}.孟子义.png','wb') as f:
            f.write(response_detail)
        print(f'图片{n}写入完成!')
        n += 1
else:
    print("未找到指定字符串")